from pyspark import SparkConf, SparkContext

if __name__ == '__main__':
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    sc = SparkContext(conf=conf)

    rdd1 = sc.parallelize([('a', 1), ('a', 1), ('b', 1), ('b', 1), ('b', 1)])

    # [('b', [1, 1, 1]), ('a', [1, 1])]
    print(rdd1.groupByKey().map(lambda x: (x[0], list(x[1]))).collect())
